/**********************************************************************/
/*
   Author: Karan Makkar
   Created: October 2023
   Description: Cross check report win/apply in SUS/SAK/Survey
*/
/**********************************************************************/

/*----------------------------------------------------*/
* Section: Setup
/*----------------------------------------------------*/

  clear
  set more off
  set matsize 11000

* include filepaths 
  if "$master_run" !="1" include "./Do/SET_FILEPATHS.do"

* Log
  cap log close
  global prefix: display %tdCYND td(`c(current_date)')
  log using "$KP_logs/${prefix}_demographics_match_clean.txt", text replace
  
  *Switches
  local admin = 1

/*----------------------------------------------------*/
* Section: Run programs and set dataset filepaths
/*----------------------------------------------------*/

* Set filepaths for dataset
  global aug20 "$KP_deid_sakernas/Clean/sak_aug20_deid_clean_merged.dta"
  global feb21 "$KP_deid_sakernas/Clean/sak_feb21_deid_clean_merged.dta"
  global aug21 "$KP_deid_sakernas/Clean/sak_aug21_deid_clean_merged.dta"
  global sep20 "$KP_deid_susenas/Clean/sus_sep20_deid_clean_merged.dta"
  global mar21 "$KP_deid_susenas/Clean/sus_mar21_deid_clean_merged.dta"
  global sep21 "$KP_deid_susenas/Clean/sus_sep21_deid_clean_merged.dta"
  global mar22 "$KP_deid_susenas/Clean/sus_mar22_deid_clean_merged.dta"


/*----------------------------------------------------*/
* Make match tables
/*----------------------------------------------------*/

  /************
   Save Admin Data Tempfiles
   ************/
   if `admin' ==1 {
  use "$KP_deid_admin/Clean/pmo_b1-22_clean_long_deid.dta", clear

  ** 2021
  preserve
  gen diff = abs(17-batch)
  * keep the first demographic response per anon_id4
  bysort anon_id4 (batch diff): keep if _n ==1
  
  *tempfile admin_21
  *sa `admin_21'
  datasignature 
	    if "`r(datasignature)'" == "23434700:73(35628):1852873270:3925493759" {
	   	 save "$KP_deid_admin/Clean/admin_s21", replace
      }
	 	else {
	     di as err "Careful, your machine produces a different dataset"
	    stop
			}
  restore 
  ** 2022
  gen diff = abs(22-batch)
  bysort anon_id4 (batch diff): keep if _n ==1
  
  *tempfile admin_22
  *sa `admin_22'
  datasignature 
	    if "`r(datasignature)'" == "23434700:73(35628):3825558789:2060303813" {
	   	 save "$KP_deid_admin/Clean/admin_s22", replace
      }
	 	else {
	     di as err "Careful, your machine produces a different dataset"
	    stop
			}
  
 }
  
  /************
   2021 Survey
   ************/
  use "$KP_deid_survey/2021/Clean/071323_survey_21_deid_clean_merged.dta", clear

  * Keep only those sent survey
  keep if (sent_survey ==1 | !mi(progress))

 *Tag respondents
	gen respondent21 = completed ==1 & consented == 1 & verified ==1

  tempfile s21
  sa `s21'
  
  * Cleaning
  rename year_dob year_dob_21
  replace gender_survey = 0 if gender_survey ==2
  drop gender

  * Merge in Admin data
  fmerge 1:1 anon_id4 using "$KP_deid_admin/Clean/admin_s21", assert(2 3) keep(3) nogen keepusing(year_dob anon_prov_id gender) 

  * Demographics match
  gen agematch = abs(year_dob - year_dob_21) <=2
  la var agematch "Age Match"
  gen gendermatch = gender == gender_survey
  la var gendermatch "Gender Match"
  gen provincematch = anon_prov_id == anon_prov_id_21
  la var provincematch "Province Match"
  gen match2 = agematch == 1 & gendermatch == 1
  la var match2 "Age, Gender Match"
  gen match3 = agematch == 1 & gendermatch == 1 & provincematch == 1 
  la var match3 "Age, Gender, Province Match"
  
  /************
   2022 Survey
   ************/
  use "$KP_deid_survey/2022/Clean/071323_survey_22_deid_clean_merged.dta", clear

 *Keep only respondents
  keep if (sent_survey ==1 | !mi(progress))

  *Tag respondents
  gen respondent22 = completed ==1 & consented == 1 & verified ==1
  

  bysort anon_id4: keep if _n ==1

  tempfile s22
  sa `s22', replace
  
  * Cleaning
  rename year_dob year_dob_22
  replace gender_survey = 0 if gender_survey ==2
  drop gender

  * Merge in Admin data
  fmerge 1:1 anon_id4 using "$KP_deid_admin/Clean/admin_s22", assert(2 3) keep(3) nogen keepusing(year_dob anon_prov_id gender) 

  * Demographics match
  gen agematch = abs(year_dob - year_dob_22) <=2
  la var agematch "Age Match"
  gen gendermatch = gender == gender_survey
  la var gendermatch "Gender Match"
  gen match2 = agematch == 1 & gendermatch == 1
  la var match2 "Age, Gender Match"

  /************
   Sakernas
   ************/
   use "${aug20}", clear
  append using "${feb21}"
  append using "${aug21}"

  *Win before survey var
  gen win_before_survey = .
  replace win_before_survey  = ever_win_3 if sak_round == 5 & inrange(batch, 2, 3)
  replace win_before_survey  = ever_win_11 if sak_round == 6 & inrange(batch, 2, 11)
  replace win_before_survey  = ever_win_17 if sak_round == 7 & inrange(batch, 2, 17)
  gegen win_before_survey = max(win_before_survey), by(anon_id4 sak_round) replace

  *Cash before survey var
  gen cash_before_survey = 0
  replace cash_before_survey  = 1 if sak_round == 5 & inrange(batch, 2, 3) & date_incentive <= date("2020/7/31", "YMD")
  replace cash_before_survey  = 1 if sak_round == 6 & inrange(batch, 2, 11) & date_incentive <= date("2021/1/31", "YMD")
  replace cash_before_survey  = 1 if sak_round == 7 & inrange(batch, 2, 17) & date_incentive <= date("2021/7/31", "YMD")
  gegen cash_before_survey = max(cash_before_survey), by(anon_id4 sak_round) replace
  
  *Apply before survey var
  gen apply_before_survey = 0
  replace apply_before_survey  = 1 if sak_round == 5 & first_apply_batch <=3
  replace apply_before_survey  = 1 if sak_round == 6 & first_apply_batch <=11
  replace apply_before_survey  = 1 if sak_round == 7 & first_apply_batch <=17

  gen diff = abs(17-batch)
  bysort anon_id4 sak_round (batch diff): keep if _n ==1
  datasignature 
      if "`r(datasignature)'" == "181331:225(67844):4155441897:2868558666" {
     	 di "Matched dataset looks good"
      }
   	else {
       di as err "Careful, your machine produces a different dataset"
      stop
      }
  drop diff

  *Merge in Surveys Overlap
   fmerge m:1 anon_id using `s21', keep(1 3) keepusing(respondent21) gen(m21) 
   fmerge m:1 anon_id using `s22', keep(1 3) keepusing(respondent22) gen(m22)
   gen survey_resp = respondent21 ==1 | respondent22 ==1

  * Province
   rename kode_prov prov_id_sak
   merge m:1 prov_id_sak using "${KP_deid_misc}/Clean/provid_anon_crosswalk", nogen assert(1 3) keepusing(anon_prov_id_sak)
  * Gender
  gen gender_sak = female ==0
  drop gender

  *Educ cleaning
  
  *First, make consistent across sak waves
  replace educ = 5 if educ ==6 & sak_round==7
  replace educ = 6 if educ ==7 & sak_round==7
  replace educ = 7 if educ ==8 & sak_round==7
  replace educ = 8 if inrange(educ, 9, 12) & sak_round==7
  
  *Next, make 5 category versions of each

  *SAK
  gen educ_sak = .
  replace educ_sak = 1 if inrange(educ, 1, 2) // Elementary or Lower
  replace educ_sak = 2 if educ ==3 // Junior High
  replace educ_sak = 3 if inrange(educ, 4, 5) // Senior High School + Vocational
  replace educ_sak = 4 if inrange(educ, 6, 8) // Above HS

  *PMO
  gen educ_pmo = .
  replace educ_pmo = 1 if education ==1 // Elementary or Lower
  replace educ_pmo = 2 if education ==2 // Junior High
  replace educ_pmo = 3 if education ==3 // Senior High School + Vocational
  replace educ_pmo = 4 if inrange(education, 4, 8) // Above Vocational
  
  * Merge in Admin data
  fmerge m:1 anon_id4 using "$KP_deid_admin/Clean/admin_s21", assert(2 3) keep(3) nogen keepusing(gender) 

  * Demographics match
  gen agematch = abs(year_dob - year_dob_sak) <=2
  la var agematch "Age Match"
  gen gendermatch = gender == gender_sak
  la var gendermatch "Gender Match"
  gen provincematch = anon_prov_id == anon_prov_id_sak
  la var provincematch "Province Match"
  gen educmatch = educ_pmo == educ_sak
  la var educmatch "Educ Match"
  gen match2 = agematch == 1 & gendermatch == 1
  la var match2 "Age, Gender Match"
  gen match3 = agematch == 1 & gendermatch == 1 & provincematch == 1 
  la var match3 "Age, Gender, Province Match"
  gen match4 = agematch == 1 & gendermatch == 1 & provincematch == 1 & educmatch ==1
  la var match4 "Age, Gender, Province, Educ Match"
  gen match3_sus = agematch == 1 & gendermatch == 1 & educmatch == 1 
  la var match3_sus "Age, Gender, Educ Match"

  *Saved Age-Gender-Educ matched IDs
  keep anon_id4 sak_round match3_sus agematch gendermatch provincematch educmatch
  datasignature 
	    if "`r(datasignature)'" == "181331:7(5777):3122772265:1877223520" {
	   	 save "$KP_deid_sakernas/Clean/age_gender_educ_match_ids.dta", replace
      }
	 	else {
	     di as err "Careful, your machine produces a different dataset"
	    stop
			}

  /************
   Susenas
   ************/
   use "${sep20}", clear
  append using "${mar21}"
  append using "${sep21}"
  append using "${mar22}"

   *Win before survey var
  gen win_before_survey = .
  replace win_before_survey  = ever_win_5 if sus_round == 5 & inrange(batch, 2, 5)
  replace win_before_survey  = ever_win_11 if sus_round == 6 & inrange(batch, 2, 11)
  replace win_before_survey  = ever_win_18 if sus_round == 7 & inrange(batch, 2, 18)
  replace win_before_survey  = ever_win_22 if sus_round == 8 & inrange(batch, 2, 22)
  gegen win_before_survey = max(win_before_survey), by(sus_round anon_id4) replace

  *Cash before survey var
  gen cash_before_survey = 0
  replace cash_before_survey  = 1 if sus_round == 5 & inrange(batch, 2, 5) & date_incentive <= date("2020/8/30", "YMD")
  replace cash_before_survey  = 1 if sus_round == 6 & inrange(batch, 2, 11) & date_incentive <= date("2021/2/28", "YMD")
  replace cash_before_survey  = 1 if sus_round == 7 & inrange(batch, 2, 18) & date_incentive <= date("2021/8/30", "YMD")
  replace cash_before_survey  = 1 if sus_round == 8 & inrange(batch, 2, 22) & date_incentive <= date("2022/2/28", "YMD")
  gegen cash_before_survey = max(cash_before_survey), by(anon_id4 sus_round) replace

  gen diff = abs(17-batch)
  bysort anon_id4 sus_round (batch diff): keep if _n ==1
  drop diff 

  *Merge in Surveys Overlap
   fmerge m:1 anon_id using `s21', keep(1 3) keepusing(respondent21) gen(m21) 
   fmerge m:1 anon_id using `s22', keep(1 3) keepusing(respondent22) gen(m22)
   gen survey_resp = respondent21 ==1 | respondent22 ==1

  * Gender
  gen gender_sus = female ==0
  drop gender 

  *Educ cleaning
  
  *SUS

  *Sep20
  gen educ_sus = .
  replace educ_sus = 1 if sus_round ==5 & inrange(educ, 1, 2) // Elementary or Lower
  replace educ_sus = 2 if sus_round ==5 & educ ==3 // Junior High
  replace educ_sus = 3 if sus_round ==5 & educ ==4 // Senior High School + Vocational
  replace educ_sus = 4 if sus_round ==5 & educ ==5 // Above Vocational

  *Mar 21
  replace educ_sus = 1 if sus_round ==6 & (inrange(educ, 1, 4) | educ == 22) // Elementary or Lower
  replace educ_sus = 2 if sus_round ==6 & inrange(educ, 5, 8) // Junior High
  replace educ_sus = 3 if sus_round ==6 & inrange(educ, 9, 14) // Senior High School + Vocational
  replace educ_sus = 4 if sus_round ==6 & inrange(educ, 15, 21) // Above Vocational

  *Sep21
  replace educ_sus = 1 if sus_round ==7 & (inrange(educ, 1, 5) | educ == 25) // Elementary or Lower
  replace educ_sus = 2 if sus_round ==7 & inrange(educ, 6, 10) // Junior High
  replace educ_sus = 3 if sus_round ==7 & inrange(educ, 11, 16) // Senior High School + Vocational
  replace educ_sus = 4 if sus_round ==7 & inrange(educ, 17, 24) // Above Vocational

  *Mar22
  replace educ_sus = 1 if sus_round ==8 & (inrange(educ, 1, 5)) // Elementary or Lower
  replace educ_sus = 2 if sus_round ==8 & inrange(educ, 6, 10) // Junior High
  replace educ_sus = 3 if sus_round ==8 & inrange(educ, 11, 16) // Senior High School + Vocational
  replace educ_sus = 4 if sus_round ==8 & inrange(educ, 17, 24) // Above Vocational



  *PMO
  gen educ_pmo = .
  replace educ_pmo = 1 if education ==1 // Elementary or Lower
  replace educ_pmo = 2 if education ==2 // Junior High
  replace educ_pmo = 3 if education ==3 // Senior High School + Vocational
  replace educ_pmo = 4 if inrange(education, 4, 8) // Above Vocational
  
  * Merge in Admin data
  fmerge m:1 anon_id4 using "$KP_deid_admin/Clean/admin_s21", assert(2 3) keep(3) nogen keepusing(gender) 

  * combine get_pk and hh_pk_win 
  gen pk_win_comb = hh_pk_win if sus_round == 6 | sus_round ==7
  replace pk_win_comb = get_pk if sus_round == 5 | sus_round ==8
  la var pk_win_comb "Report Selected"

  * Demographics match
  gen agematch = abs(year_dob - year_dob_sus) <=2
  la var agematch "Age Match"
  gen gendermatch = gender == gender_sus
  la var gendermatch "Gender Match"
  gen educmatch = educ_pmo == educ_sus
  la var educmatch "Educ Match"
  gen match2 = agematch == 1 & gendermatch == 1
  la var match2 "Age, Gender Match"
  gen match3_sus = agematch == 1 & gendermatch == 1 & educmatch == 1 
  la var match3_sus "Age, Gender, Educ Match"

  *Saved Age-Gender-Educ matched IDs
  keep anon_id4 sus_round match3_sus agematch gendermatch educmatch

  datasignature 
	    if "`r(datasignature)'" == "212608:6(58476):2379144961:3420991309" {
	   	 save "$KP_deid_susenas/Clean/age_gender_educ_match_ids.dta", replace
      }
	 	else {
	     di as err "Careful, your machine produces a different dataset"
	    stop
			}